
cd "../replication-package"

*----------------------------------------------*
**# ---------- 1. Family wage gap ---------- #** 
*----------------------------------------------*

/* 

  The goal of this file is to calculate the gender wage gap. 
  
  1. Adjusted for missing values and top-codes
  2. Generated hourly wages based on weekly working hours and weeks worked last year. 
     Only those who had larger than zero earnings, worked more than zero hours and weeks are included. 
  3. Hourly wages were top-coded; following C&W 2014. 
  4. Although the top-coding was based on the 1999 USD, it was converted into 2019 USD. 
     It wouldn't be relavant to the raw hourly wage gap, but might affect the logged hourly wage gap. 
	 

*/ 


use "data/raw/cps_00088.dta", clear
tab asecflag // make sure only including ASEC 
keep if inrange(year, 1983, 2021) // 'last year' earnings 
//drop if year > 1987 & srcearn ~=1 // srcearn was started to be collected from 1988. 

merge m:1 year using "data/ipums_topcoding.dta", gen(m_topcode)
tab year m_topcode // only pre-1983 not matched
keep if m_topcode==3 
drop m_topcode 

*** Age restriction 
gen agesamp_1865 = inrange(age, 18, 65)

*** function for CPS incwage 
*** NOTE: this program needs the top-code datafile. merge the dataset before running 
cap program drop cpsincome 
program define cpsincome 

	*** 1. Top-coding & CPI adjustment (universal to all specifications)

	foreach incvar in incwage inclongj oincwage {
		recode `incvar' (99999999=.), gen(tc_`incvar') // 99999999 = NIU 
	}

	replace tc_incwage = tc_incwage*1.5 if tc_incwage>=incwage_top & tc_incwage!=. & year<=1987
	// 'incwage_top' given by the merged data with top-coding values 
	//sum tc_incwage if year<=1987

	// 1988 and onward, 'incwage' is the sum of inclongj and oincwage 
	// https://cps.ipums.org/cps-action/variables/INCWAGE#description_section
	foreach incvar in inclongj oincwage {
		replace tc_`incvar' = tc_`incvar'*1.5 if tc_`incvar'>=`incvar'_top & tc_`incvar'!=. & year>=1988	
	}
	replace tc_incwage = tc_inclongj + tc_oincwage if year>=1988

	sum tc_*

	*** Calculate hourly earnings 
	// weeks worked last year? 
	tab wkswork1 srcearn, m
		//: note that weeks worked include non-wage working 
	
	// usual hours worked last year? 
	recode uhrsworkly (999=.)
	bys srcearn: sum uhrsworkly if inrange(srcearn, 1, 4)
		//: note that usual hours worked include non-wage working 

	// calculate hourly earnings 
	gen hrly_earn = tc_incwage / (wkswork1*uhrsworkly)
	sum hrly_earn

	// CPI adjustment 
	// use 2019 USD, not 2020 USD 
	gen hrly_earn99 = hrly_earn*cpi99
	gen hrly_earn19 = hrly_earn99*1.535
	sum hrly_earn*

	// make sure we're using actual year earnings obtained 
	gen earn_year = year - 1 
	drop year 
	rename earn_year year 

	*** 2. Specifications 
	tab year srcearn // only available from 1987 

	// 1) all hrly earnings 
	gen hrearn_v1 = hrly_earn19 
	label var hrearn_v1 "Hourly earnings in 2019 USD, ALL"

	// 2) only include wage/salary 
	// (source of income from the longest job was wage/salary)
	gen hrearn_v2 = hrly_earn19
	fre srcearn
	replace hrearn_v2 = . if srcearn!=1 & year>=1987
	label var hrearn_v2 "Hourly earnings in 2019 USD, WAGE/SALARY ONLY"

	// 3) only include wage/salary + extreme values adjusted 
	// truncating below $1 and above $100 in 1979 USD 
	// following Cha and Weeden 2014
	// also dropping zero earners 
	gen hrearn_v3 = hrearn_v2
	gen hrearn_1979 = hrearn_v3*0.644*0.436 
		//: first convert to 1999 USD, and then 1979 USD 
		//: https://cps.ipums.org/cps/cpi99.shtml
	replace hrearn_1979 = 1 if hrearn_1979<=1 & hrearn_1979>0 
	replace hrearn_1979 = 100 if hrearn_1979>=100 & hrearn_1979!=. 
	replace hrearn_v3 = hrearn_1979*2.295*1.535 
		//: convert first back to 1999 USD, and then 2019 USD 
	replace hrearn_v3 = . if srcearn!=1 & year>=1987
	replace hrearn_v3 = . if hrearn_v3==0 
	sum hrearn_v3, det 

	label var hrearn_v3 "Hourly earnings in 2019 USD, WAGE/SALARY ONLY & TRUNCATED"

	// all meausres 
	sum hrearn_v*
end


foreach age in 1865 {
	
	preserve 
	keep if agesamp_`age'==1 

	// run the cpsincome program to get the hourly wages 
	cpsincome 

	label data "ASEC wage for age `age', earned years 1982-2020 (1983-2021 interview years)"
	
	tempfile ASEC_`age'
	save `ASEC_`age''

	restore 
	
}


* 5. Generating median hourly wage for males and females per state & year
foreach age in 1865 {
	
	use `ASEC_`age'', clear 
	recode sex (1=0) (2=1), gen (fem)
	gen n = 1
	tab age 
	
	// all people (`age' yrs old)
	preserve
	collapse (sum) obs`age'_all = n ///
			 (p50) hrearn = hrearn_v1 ///
			 (p50) hrwage = hrearn_v2 ///
			 (p50) hrwage_trc = hrearn_v3 [pw = asecwt], by (statefip year fem) 
	sort statefip year fem
	
	// male-to-female wage/earnings ratio 
	bys statefip year: gen earngap`age'_a = hrearn / hrearn[_n+1] 
	bys statefip year: gen wagegap`age'_a = hrwage / hrwage[_n+1] 
	bys statefip year: gen trcwagegap`age'_a = hrwage_trc / hrwage_trc[_n+1] 
	
	label var earngap`age'_a "Hourly earnings ratio for ALL, age `age'"
	label var wagegap`age'_a "Hourly wage ratio for ALL, age `age'"
	label var trcwagegap`age'_a "(Truncated) Hourly wage ratio for ALL, age `age'"

	bys statefip year: keep if _n==1 
	keep statefip year *_a 
	tempfile all`age'
	save `all`age''
	restore 

	use `all`age'', clear 
	sort statefip year 
	gen state_a = statefip
	label define state_a  1 "AL" 2 "AK" 5 "AR" 4 "AZ" 6 "CA" 8 "CO" 9 "CT" 10 "DE" ///
			11 "DC" 12 "FL" 13 "GA" 15 "HI" 16 "ID" 19 "IA" 17 "IL" 18 "IN" 20 "KS" 21 "KY" 22 "LA" ///
			25 "MA" 24 "MD" 23 "ME" 26 "MI" 27 "MN" 29 "MO" 28 "MS" 30 "MT" 37 "NC" 38 "ND" ///
			31 "NE" 33 "NH" 34 "NJ" 35 "NM" 32 "NV" 36 "NY" 39 "OH" 40 "OK" 41 "OR" 42 "PA" ///
			44 "RI" 45 "SC" 46 "SD" 47 "TN" 48 "TX" 49 "UT" 51 "VA" 50 "VT" 53 "WA" 55 "WI" ///
			54 "WV" 56 "WY", replace
	label val state_a state_a
	order state_a, after(statefip)
	label var state_a "Abbreviated State Names"
	qui sum year 
	
	label data "State-level wage gap using ASEC age `age', `r(min)'-`r(max)'"
	save "state_wagegap`age'.dta", replace 

}

use state_wagegap1865.dta, clear 
sort statefip year 
keep if inrange(year, 1982, 2020)

save "data/state/01_wagegap_cps_19822020.dta", replace 

rm state_wagegap1865.dta 







